package analyseText;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;

import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

public class WeiboFenXi {

	/**
	 * 对微博数据的Users文件夹进行数据分析，提取微博的text字段，过滤了retweeted_status（转发）文本
	 * @param sourceFile (String)要分析的文本绝对路径
	 * @param dirFile (String)分析后数据存储的文件绝对路径
	 * @return void
	 * */	

	public void userTextDoc(String sourceFile, String dirFile) {
		StringBuffer sb1 = new StringBuffer();
		StringBuffer sb2 = new StringBuffer();
		File file = new File(sourceFile);
		BufferedReader br=null;
		BufferedWriter bw=null;
		String str=null;
		try {
			br = new BufferedReader(new FileReader(file));
			while ((str = br.readLine()) != null) {
				sb1.append(str);
			}
			JSONArray weibo = new JSONArray(sb1.toString());
			for (int i = 0; i < weibo.length(); i++) {
				JSONObject info = weibo.getJSONObject(i);
				 String id=info.getString("id");
				 String name=info.getString("name");
				// String location=info.getString("location");
				// status中的text
				JSONObject status = info.getJSONObject("status");
				String text_status = status.getString("text");
				// retweeted_status中的text(表示转发的text)
				// status.getJSONObject("retweeted_status");
				// retweeted_status.getString("text");
				// System.out.println("\t微博号:  "+id+"\t昵称:  "+name+"\t地点:  "+location+"\t微博信息:  "+text_status);
				// System.out.println("\t微博号:  "+id+"微博信息:  "+text_status);
				sb2.append(id+"\t"+name+"\t"+text_status);
				sb2.append("\n");
			}

			bw = new BufferedWriter(new FileWriter(dirFile, true));
			bw.write(sb2.toString());
			// bw.newLine();
			bw.flush();
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (JSONException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {
				bw.close();
				br.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}
	
	
	/**对微博数据的Users文件夹进行数据分析,将获取的Users文件夹中的JSON数据的字符串进行分析。
	 * @param str (String) 要处理的字符串
	 * @return String 处理后的字符串
	 * */
	public String userTextStr(String str){
		StringBuffer sb1=new StringBuffer();
		JSONArray weibo;
		try {
			weibo = new JSONArray(str);
			for (int i = 0; i < weibo.length(); i++) {
				JSONObject info = weibo.getJSONObject(i);
				 String id=info.getString("id");
				 String name=info.getString("name");
				// String location=info.getString("location");
				// status中的text
				JSONObject status = info.getJSONObject("status");
				String text_status = status.getString("text");
				// retweeted_status中的text(表示转发的text)
				// JSONObject retweeted_status =
				// status.getJSONObject("retweeted_status");
				// String text_retweeted_status =
				// retweeted_status.getString("text");
				// System.out.println("\t微博号:  "+id+"\t昵称:  "+name+"\t地点:  "+location+"\t微博信息:  "+text_status);
				// System.out.println("\t微博号:  "+id+"微博信息:  "+text_status);
				sb1.append(id+"\t"+name+"\t"+text_status);
				sb1.append("\n");
			}
		} catch (JSONException e) {
			e.printStackTrace();
		}
		return sb1.toString();
	}

	/**
	 * 对Statuses文件夹下的文本进行分析，将文本分析完后，写入另一个文本
	 * @param sourceFile (String)要分析的文本
	 * @param dirFile  (String)将分析的文本内容存到另一个文件
	 * */
	public void statusesTextdoc(String sourceFile, String dirFile) {
		StringBuffer sb1 = new StringBuffer();
		StringBuffer sb2 = new StringBuffer();
		File file = new File(sourceFile);
		BufferedReader br=null;
		BufferedWriter bw=null;
		String str=null;
		try {
			br = new BufferedReader(new FileReader(file));
			while ((str = br.readLine()) != null) {
				sb1.append(str);
			}

			// JSON数据分析
			JSONArray data = new JSONArray(sb1.toString());
			for (int i = 0; i < data.length(); i++) {
				JSONObject info = data.getJSONObject(i);
				String text = info.getString("text");
//				JSONObject user = info.getJSONObject("user");
				// String id=user.getString("String");
//				 String name=user.getString("name");
				// String location=user.getString("location");
				sb2.append(text);
				sb2.append("\n");
			}
			bw = new BufferedWriter(new FileWriter(new File(dirFile)));
			bw.write(sb2.toString());
			bw.flush();

		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (JSONException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {

				br.close();
				bw.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

	/**
	 * 对Statuses文件夹下的文本进行分析。将获取的Statuses中的JSON数据的字符串进行分析。
	 * 
	 * @param str (String)要分析的字符串
	 * @return String 返回处理后的text字符串
	 * */
	public String statusesTextStr(String str){
		StringBuffer sb1=new StringBuffer();
			//JSON数据分析
			JSONArray data;
			try {
				data = new JSONArray(str);
				for(int i=0;i<data.length();i++){
					JSONObject info=data.getJSONObject(i);
					String text=info.getString("text");
//					JSONObject user=info.getJSONObject("user");
//					String id=user.getString("id");
//					String location=user.getString("location");
					//获取昵称+text格式的数据
//					String name=user.getString("name");
					sb1.append(text);
					sb1.append("\n");
				}
			} catch (JSONException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		return sb1.toString();
	}
	

	
	/** 测试方法 */
//	 public static void main(String[] args) {
//		 String src=null;
//		 String dir=null;
//		 WeiboFenXi f=new WeiboFenXi();
//		 src="E:\\WordSplit\\WeiBoWenBen\\weibo_datas\\SinaNormalRobot\\Statuses\\10408.txt";
//		 dir="E:\\WordSplit\\WeiBoWenBen\\weibo_datas\\SinaNormalRobot\\10408_text.txt";
//		 f.statusesTextdoc(src, dir);
	// src="E:\\WordSplit\\WeiBoWenBen\\weibo_datas\\SinaNormalRobot\\Statuses\\10408.txt";
	// dir="E:\\WordSplit\\WeiBoWenBen\\weibo_datas\\SinaNormalRobot\\Statuses数据分析\\10408_text.txt";
	// f.statuses_text(src,dir);
	// src="E:\\WordSplit\\WeiBoWenBen\\weibo_datas\\SinaNormalRobot\\Users\\10411.txt";
	// dir="E:\\WordSplit\\WeiBoWenBen\\weibo_datas\\SinaNormalRobot\\Users数据分析\\10411_text.txt";
	// f.user_text(src, dir);
//	 }
}
